import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
import os
import sys
import csv
from datetime import datetime

class DiscountedStockIndex:
    def __init__(self, sDiscGOPFileName):     
        self.sFileName = sDiscGOPFileName
        # read in data
        print("Started reading data")
        f2 = open(sDiscGOPFileName, "r")
        print("We have opened the file "+sDiscGOPFileName)
        self.datelist = []
        self.valuelist = []
        self.timelist = []
        self.qvsqrtvaluelist = []

        fTime = None
        fPrevValue = None
        for line in f2:
            x = line.split(',')
            # extract the date and value
            try:        
                datetime_object = datetime.strptime(x[0], '%d/%m/%Y')
            except:
                datetime_object = None
            try:
                fValue = float(x[1])
            except:
                fValue = None
            if (datetime_object is not None) and (fValue is not None):
                # convert date to a time in years
                yyyy = datetime_object.year
                mm = datetime_object.month
                dd = datetime_object.day
                if (fTime is None):
                    fTime = yyyy
                    fQV = 0
                else:
                    firstday_datetime_object = datetime(yyyy,1,1)
                    lastday_datetime_object = datetime(yyyy+1,1,1)
                    delta = datetime_object - firstday_datetime_object
                    DIY = lastday_datetime_object - firstday_datetime_object
                    fTime = yyyy + delta.days/DIY.days
                    diffsqrtvalue = math.sqrt(fValue)-math.sqrt(fPrevValue)
                    fQV = fQV + diffsqrtvalue*diffsqrtvalue
                self.datelist.append(datetime_object)
                self.valuelist.append(fValue)
                self.timelist.append(fTime)
                self.qvsqrtvaluelist.append(fQV)
                fPrevValue = fValue

    def __str__(self):
        return self.sFileName
    def RsquFromTau0(self, tau0,j):
       # we compute the R-squared coefficient based on the regression of:
       # tau(t) = log {[sqrt Sbar](t) + exp(tau0)}
       # on t for t = t0, t1, ..., tj
       #
       # firstly compute the means
       sum_t = 0.0
       sum_tau = 0.0
       for i in range(j+1):
           t = self.timelist[i]
           qv = self.qvsqrtvaluelist[i]
           tau = math.log(qv + math.exp(tau0))
           sum_t += t
           sum_tau += tau
       mean_t = sum_t/(j+1)
       mean_tau = sum_tau/(j+1)
       # secondly compute the sum of products of deviations
       S_t_tau = 0.0
       S_t_t = 0.0
       S_tau_tau = 0.0
       for i in range(j+1):
           t = self.timelist[i]
           qv = self.qvsqrtvaluelist[i]
           tau = math.log(qv + math.exp(tau0))
           S_t_t += (t-mean_t)**2
           S_t_tau += (t-mean_t)*(tau-mean_tau)
           S_tau_tau += (tau-mean_tau)**2
       Rsqu = S_t_tau**2/(S_t_t*S_tau_tau)
       return Rsqu
    def RsquSlopeInterceptFromTau0(self, tau0,j):
       # we compute the R-squared coefficient based on the regression of:
       # tau(t) = log {[sqrt Sbar](t) + exp(tau0)}
       # on t for t = t0, t1, ..., tj
       #
       # firstly compute the means
       sum_t = 0.0
       sum_tau = 0.0
       for i in range(j+1):
           t = self.timelist[i]
           qv = self.qvsqrtvaluelist[i]
           tau = math.log(qv + math.exp(tau0))
           sum_t += t
           sum_tau += tau
       mean_t = sum_t/(j+1)
       mean_tau = sum_tau/(j+1)
       # secondly compute the sum of products of deviations
       S_t_tau = 0.0
       S_t_t = 0.0
       S_tau_tau = 0.0
       for i in range(j+1):
           t = self.timelist[i]
           qv = self.qvsqrtvaluelist[i]
           tau = math.log(qv + math.exp(tau0))
           S_t_t += (t-mean_t)**2
           S_t_tau += (t-mean_t)*(tau-mean_tau)
           S_tau_tau += (tau-mean_tau)**2
       Rsqu = S_t_tau**2/(S_t_t*S_tau_tau)
       cov_t_tau = S_t_tau/(j+1)
       var_t = S_t_t/(j+1)
       Slope = cov_t_tau/var_t if var_t>0 else 0.0
       Intercept = mean_tau-mean_t*Slope       
       return [Rsqu, Slope, Intercept]   
    def GetOptimalTau0V2(self,n):
        # we compute the optimal value of tau0 which maximises the R-squared coefficient
        # based on the regression of tau(t) on t for t=t0,t1,...,tn
        # initial estimate of optimal tau0 is 0
        tau0 = -3.0
        dtau0 = 0.001
        eps = 0.0000001
        f_tau0 = self.RsquFromTau0(tau0,n)
        f_tau0_minus_dtau = self.RsquFromTau0(tau0-dtau0,n)
        f_tau0_plus_dtau = self.RsquFromTau0(tau0+dtau0,n)
        df_dtau0 = (f_tau0_plus_dtau - f_tau0_minus_dtau)/(2.0*dtau0)
        nIterations = 0
        while abs(df_dtau0)>eps and nIterations<20:
            nIterations = nIterations + 1
            d2f_dtau02 = (f_tau0_plus_dtau + f_tau0_minus_dtau - 2.0*f_tau0)/(dtau0*dtau0)
            print("tau0=",tau0,", f_tau0=",f_tau0,"df_dtau0=",df_dtau0)
            tau0 = tau0 - df_dtau0/d2f_dtau02
            f_tau0 = self.RsquFromTau0(tau0,n)
            f_tau0_minus_dtau = self.RsquFromTau0(tau0-dtau0,n)
            f_tau0_plus_dtau = self.RsquFromTau0(tau0+dtau0,n)
            df_dtau0 = (f_tau0_plus_dtau - f_tau0_minus_dtau)/(2*dtau0)
        print("nIterations = ",nIterations)
        return tau0
    def GetOptimalTau0(self,j):
        # we compute the optimal value of tau0 which maximises the R-squared coefficient
        # based on the regression of tau(t) on t for t=t0,t1,...,tj
        # initial estimate of optimal tau0 is 0
        dtau = 1.0
        tau0 = 0.0
        eps = 0.00000001
        f_tau0 = self.RsquFromTau0(tau0,j)
        bFoundMax = False
        while(not bFoundMax):
            tau2 = tau0 + dtau
            tau1 = tau0 - dtau            
            f_tau1 = self.RsquFromTau0(tau1,j)
            f_tau2 = self.RsquFromTau0(tau2,j)
            if(f_tau1>f_tau0):
                if(f_tau2>f_tau1):
                    tau0 = tau2
                    f_tau0 = f_tau2
                else:
                    tau0 = tau1
                    f_tau0 = f_tau1
            elif(f_tau2>f_tau0):
                    tau0 = tau2
                    f_tau0 = f_tau2
            else:
                dtau = dtau*0.5
                bFoundMax = dtau<eps
        return tau0
    def GetOptimalTau0SlopeIntercept(self,j):
        # we compute the optimal value of tau0 which maximises the R-squared coefficient
        # based on the regression of tau(t) on t for t=t0,t1,...,tj
        # initial estimate of optimal tau0 is 0
        dtau = 1.0
        tau0 = 0.0
        eps = 0.00000001
        [f_tau0,slope,intercept] = self.RsquSlopeInterceptFromTau0(tau0,j)
        bFoundMax = False
        while(not bFoundMax):
            tau2 = tau0 + dtau
            tau1 = tau0 - dtau            
            [f_tau1,slope1,intercept1] = self.RsquSlopeInterceptFromTau0(tau1,j)
            [f_tau2,slope2,intercept2] = self.RsquSlopeInterceptFromTau0(tau2,j)
            if(f_tau1>f_tau0):
                if(f_tau2>f_tau1):
                    tau0 = tau2
                    f_tau0 = f_tau2
                    slope = slope2
                    intercept = intercept2
                else:
                    tau0 = tau1
                    f_tau0 = f_tau1
                    slope = slope1
                    intercept = intercept1
            elif(f_tau2>f_tau0):
                    tau0 = tau2
                    f_tau0 = f_tau2
                    slope = slope2
                    intercept = intercept2
            else:
                dtau = dtau*0.5
                bFoundMax = dtau<eps
        return [tau0,slope,intercept]
    def GetListOfPaymentIndices(self,i,m,n,freq):
        # we obtain a list of indices in the list of dates "datelist" corresponding
        # to payment dates
        listPaymentIndices = []
        K = len(self.datelist)
        dtDate = self.datelist[i]
        fTime = self.timelist[i]
        iContribution=1
        iPayment=0   
        nContributions = 1
        nPayments = 0
        yr = dtDate.year
        mth = dtDate.month
        dy = dtDate.day
        iYYYYMMDD_curr = dtDate.year *10000 + dtDate.month * 100 + dtDate.day
        yr_next = yr
        mth_next = mth + 12/freq
        dy_next = dy
        if mth_next>12:
            mth_next = mth_next - 12
            yr_next = yr_next + 1
        iYYYYMMDD_next = yr_next*10000+mth_next*100+dy_next
        for j in range(i+1,K):
            iYYYYMMDD_prev = iYYYYMMDD_curr
            fTime = self.timelist[j]
            dtDate = self.datelist[j]
            iContribution=0
            iPayment=0
            iYYYYMMDD_curr = dtDate.year *10000 + dtDate.month * 100 + dtDate.day
            if iYYYYMMDD_curr>= iYYYYMMDD_next and iYYYYMMDD_prev<iYYYYMMDD_next:
                if nContributions<m*freq:
                    # a contribution is made
                    iContribution=1
                    nContributions = nContributions+1
                elif nPayments<(n-m)*freq: 
                    # a payment is made
                    iPayment=1
                    nPayments = nPayments+1
                    listPaymentIndices.append(j)
                # update the next contribution/payment date
                yr_next = yr_next
                mth_next = mth_next + 12/freq
                dy_next = dy_next
                if mth_next>12:
                    mth_next = mth_next - 12
                    yr_next = yr_next + 1
                iYYYYMMDD_next = yr_next*10000+mth_next*100+dy_next
        return listPaymentIndices
    def GetListOfContributionIndices(self,i,m,n,freq):
        # we obtain a list of indices in the list of dates "datelist" corresponding
        # to contribution dates
        listContributionIndices = []
        K = len(self.datelist)
        dtDate = self.datelist[i]
        fTime = self.timelist[i]
        iContribution=1
        iPayment=0   
        nContributions = 1
        listContributionIndices.append(i)
        nPayments = 0
        yr = dtDate.year
        mth = dtDate.month
        dy = dtDate.day
        iYYYYMMDD_curr = dtDate.year *10000 + dtDate.month * 100 + dtDate.day
        yr_next = yr
        mth_next = mth + 12/freq
        dy_next = dy
        if mth_next>12:
            mth_next = mth_next - 12
            yr_next = yr_next + 1
        iYYYYMMDD_next = yr_next*10000+mth_next*100+dy_next
        for j in range(i+1,K):
            iYYYYMMDD_prev = iYYYYMMDD_curr
            fTime = self.timelist[j]
            dtDate = self.datelist[j]
            iContribution=0
            iPayment=0
            iYYYYMMDD_curr = dtDate.year *10000 + dtDate.month * 100 + dtDate.day
            if iYYYYMMDD_curr>= iYYYYMMDD_next and iYYYYMMDD_prev<iYYYYMMDD_next:
                if nContributions<m*freq:
                    # a contribution is made
                    iContribution=1
                    nContributions = nContributions+1
                    listContributionIndices.append(j)
                elif nPayments<(n-m)*freq: 
                    # a payment is made
                    iPayment=1
                    nPayments = nPayments+1
                    #listPaymentIndices.append(j)
                # update the next contribution/payment date
                yr_next = yr_next
                mth_next = mth_next + 12/freq
                dy_next = dy_next
                if mth_next>12:
                    mth_next = mth_next - 12
                    yr_next = yr_next + 1
                iYYYYMMDD_next = yr_next*10000+mth_next*100+dy_next
        return listContributionIndices
    def SimulateAccumulationDecumulation5(self,i,m,n,freq):
        # we compute the retirement drawdown profile of life who commences contributions
        # at t0 = t(i), retires at time t0+m years, dies at time t0+n years.
        # 1. check if there are improper arguments
        if n<=m:
            print("Error: you must have n > m.")
            sys.exit()
        if m<=0:
            print("Error: you must have m>0.")
            sys.exit()
        if not(freq in [1,2,3,4,6,12]):
            print("Error: you must have freq in {1,2,3,4,6,12}.")
            sys.exit()
        K = len(self.datelist)
        dtDate = self.datelist[i]
        fTime = self.timelist[i]
        iContribution=1
        iPayment=0
        nContributions = 1
        elli = nContributions - 1
        nPayments = 0
        [tau0,slope,intercept] = self.GetOptimalTau0SlopeIntercept(i)
        tau_ti = math.log(math.exp(tau0)+self.qvsqrtvaluelist[i])
        listPmtIndices = self.GetListOfPaymentIndices(i, m, n, freq)
        print("listPmtIndices")
        print(listPmtIndices)
        nPmtIndices = len(listPmtIndices)
        listContIndices = self.GetListOfContributionIndices(i, m, n, freq)
        nContIndices = len(listContIndices)
        print("listContIndices")
        print(listContIndices)
        if nPmtIndices<(n-m)*freq:
            print("Error: you must have a sufficiently long data series for your values of i, m, n.")
            sys.exit()
        # compute the values of AZCBs
        Z_ti_tj = [[0 for ellj in range(nPmtIndices)] for elli in range(nContIndices)]
        N_ti_tj = [[0 for ellj in range(nPmtIndices)] for elli in range(nContIndices)]
        elli = 0
        for ellj in range(nPmtIndices):
            ell = listPmtIndices[ellj]
            tj = self.timelist[ell]
            taubar_tj = slope*tj+intercept
            P_ti_tj = 1 - math.exp(-0.5*self.valuelist[i]/(math.exp(taubar_tj)-math.exp(tau_ti)))
            Z_ti_tj[elli][ellj] = P_ti_tj
        sum_Z_ti_tj = sum(Z_ti_tj[elli])
        fCashIn = 1/freq
        N_ti_tj[elli] = [fCashIn/sum_Z_ti_tj for ellj in range(nPmtIndices)]
        W_ti = fCashIn
        WBN_ti = sum(x*y for x,y in zip(N_ti_tj[elli],Z_ti_tj[elli]))
        fSbar_ti = self.valuelist[i]
        listDates = []
        listWealthRN = []
        listWealthBN = []
        listDD_RN = []
        listDD_BN = []
        # write the header
        f = open('BacktestsFigures10and11_m'+str(m)+'_n'+str(n)+'_freq'+str(freq)+'_'+sDiscGOPFileName,'w')
        line_header = "Index,Date,Time ti,Sbar(ti),Contribution,Payment,#Contributions,#Payments,tau0,slope,intercept,tau(ti),Cash Inflow,Cash Outflow(RN),Wealth(RN)"
        line_header = line_header + ",Cash Outflow(BN),Wealth(BN)\n"
        f.write(line_header)
        
        # now write the first line
        line_format_string = "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}"
        line = line_format_string.format(i,dtDate,fTime,fSbar_ti,iContribution,iPayment,nContributions,nPayments,tau0,slope,intercept,tau_ti,fCashIn,0,W_ti)
        f.write(line)
        line_format_string = ",{},{}\n"
        line = line_format_string.format(0,WBN_ti)
        f.write(line)
        # end of writing the first line
        listDates.append(dtDate)
        listWealthRN.append(W_ti)
        listWealthBN.append(WBN_ti)
        yr = dtDate.year
        mth = dtDate.month
        dy = dtDate.day
        iYYYYMMDD_curr = dtDate.year *10000 + dtDate.month * 100 + dtDate.day
        yr_next = yr
        mth_next = mth + 12/freq
        dy_next = dy
        if mth_next>12:
            mth_next = mth_next - 12
            yr_next = yr_next + 1
        iYYYYMMDD_next = yr_next*10000+mth_next*100+dy_next
        # now loop through the remaining times
        for j in range(i+1,K):
            iYYYYMMDD_prev = iYYYYMMDD_curr
            fSbar_ti_prev = fSbar_ti
            fTime = self.timelist[j]
            dtDate = self.datelist[j]
            fSbar_ti = self.valuelist[j]
            # update Z_ti_tj
            for elli in range(nContIndices):
                for ellj in range(nPmtIndices):
                    if Z_ti_tj[elli][ellj]>0:
                        pi_ti_prev_tj = (1-1/Z_ti_tj[elli][ellj])*math.log(1-Z_ti_tj[elli][ellj]) if Z_ti_tj[elli][ellj]<1 else 0
                        Z_ti_tj[elli][ellj] = Z_ti_tj[elli][ellj]*(1+pi_ti_prev_tj*(fSbar_ti/fSbar_ti_prev-1))
            iContribution=0
            iPayment=0
            iYYYYMMDD_curr = dtDate.year *10000 + dtDate.month * 100 + dtDate.day
            tau0=0
            fCashIn = 0
            fCashOutRN = 0 
            fCashOutBN = 0
            if iYYYYMMDD_curr>= iYYYYMMDD_next and iYYYYMMDD_prev<iYYYYMMDD_next:
                if nContributions<m*freq:
                    # a contribution is made
                    iContribution=1
                    nContributions = nContributions+1
                    [tau0,slope,intercept] = self.GetOptimalTau0SlopeIntercept(j)
                    tau_ti = math.log(math.exp(tau0)+self.qvsqrtvaluelist[j])
                    # compute the AZCBs
                    elli = nContributions-1
                    for ellj in range(nPmtIndices):
                        ell = listPmtIndices[ellj]
                        tj = self.timelist[ell]
                        taubar_tj = slope*tj+intercept
                        P_ti_tj = 1 - math.exp(-0.5*self.valuelist[j]/(math.exp(taubar_tj)-math.exp(tau_ti)))
                        Z_ti_tj[elli][ellj] = P_ti_tj
                    #
                    # update N_ti_tj
                    fCashIn = 1/freq
                    sum_Z_ti_tj = sum(Z_ti_tj[elli])
                    N_ti_tj[elli] = [fCashIn/sum_Z_ti_tj for ellj in range(nPmtIndices)]
                    
                    W_ti = W_ti + fCashIn
                    WBN_ti = 0 
                    for elli in range(nContIndices):
                        WBN_ti = WBN_ti+sum(x*y for x,y in zip(N_ti_tj[elli],Z_ti_tj[elli]))
                    print("Done ",nContributions," contributions of total of ",m*freq)
                elif nPayments<(n-m)*freq: 
                    # a payment is made
                    iPayment=1
                    nPayments = nPayments+1
                    fCashOutBN = 0
                    for elli in range(nContIndices):
                        for ellj in range(nPmtIndices):
                            if j==listPmtIndices[ellj]:
                                fCashOutBN = fCashOutBN+N_ti_tj[elli][ellj]*Z_ti_tj[elli][ellj]                                
                                N_ti_tj[elli][ellj] = 0
                    print("fCashOutBN=",fCashOutBN)
                    listDD_BN.append(fCashOutBN)
                    fCashOutRN = m/(n-m)/freq
                    listDD_RN.append(fCashOutRN)
                    W_ti = W_ti - fCashOutRN
                    WBN_ti = 0 
                    for elli in range(nContIndices):
                        WBN_ti = WBN_ti+sum(x*y for x,y in zip(N_ti_tj[elli],Z_ti_tj[elli]))
                    print("Done ",nPayments," payments of total of ",(n-m)*freq)
                # update the next contribution/payment date
                yr_next = yr_next
                mth_next = mth_next + 12/freq
                dy_next = dy_next
                if mth_next>12:
                    mth_next = mth_next - 12
                    yr_next = yr_next + 1
                iYYYYMMDD_next = yr_next*10000+mth_next*100+dy_next
            # now write the j-th line
            line_format_string = "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}"
            line = line_format_string.format(i,dtDate,fTime,fSbar_ti,iContribution,iPayment,nContributions if iContribution>0 else 0,nPayments if iPayment>0 else 0,tau0 if iContribution>0 else "",slope if iContribution>0 else "",intercept if iContribution>0 else "",tau_ti if iContribution>0 else "",fCashIn,fCashOutRN,W_ti)
            f.write(line)
            line_format_string = ",{},{}\n"
            line = line_format_string.format(fCashOutBN,WBN_ti)
            f.write(line)
            # end of writing the j-th line
            listDates.append(dtDate)
            listWealthRN.append(W_ti)
            listWealthBN.append(WBN_ti)
        f.close()
        AvgDD_RN = round(sum(listDD_RN)/(n-m),2)
        AvgDD_BN = round(sum(listDD_BN)/(n-m),2)
        fPctImprovement = round((sum(listDD_BN)/sum(listDD_RN) - 1)*100,2)
        plt.plot(listDates, listWealthRN, color='r', label='RN'+" (Avg DD "+str(AvgDD_RN)+" p.a.)")
        plt.plot(listDates, listWealthBN, color='b', label='BN'+" (Avg DD "+str(AvgDD_BN)+" p.a.)")
        plt.xlabel("Date")
        plt.ylabel("Wealth")
        plt.title("Comparison of Accumulated and Decumulated Wealth Processes (Improvement "+str(fPctImprovement)+"%)")
        plt.legend()
        plt.show()        
# Main program
datetime_start = datetime.now()
print("Started at ",datetime_start)

# Firstly, define the date file name.
# Please remove the comment symbol # for the appropriate csv file used for the backtest.
#
sDiscGOPFileName = "DiscountedStockIndexSince1934.csv"

DiscGOP_Data = DiscountedStockIndex(sDiscGOPFileName)







DiscGOP_Data.SimulateAccumulationDecumulation5(4000, 45, 75, 2)


datetime_end = datetime.now()
print("Ended at ",datetime_end," and time elapsed = ",datetime_end-datetime_start)

